Introduction to the SF Package

Jacob Fiksel
March 12, 2019

Goal of the presentation: To make this!

Downloading and reading shape files

  • To plot geographic boundaries, we use shape (.shp) files
  • Download data from the US Census website
  • Choose cb_2017_us_state_20m.zip
library(sf)
usa <- read_sf("data/cb_2017_us_state_20m.shp")  

The sf class

usa
Simple feature collection with 52 features and 9 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
epsg (SRID):    4269
proj4string:    +proj=longlat +datum=NAD83 +no_defs
# A tibble: 52 x 10
   STATEFP STATENS AFFGEOID GEOID STUSPS NAME  LSAD    ALAND  AWATER
   <chr>   <chr>   <chr>    <chr> <chr>  <chr> <chr>   <dbl>   <dbl>
 1 02      017855… 0400000… 02    AK     Alas… 00    1.48e12 2.78e11
 2 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10
 3 08      017797… 0400000… 08    CO     Colo… 00    2.68e11 1.18e 9
 4 11      017023… 0400000… 11    DC     Dist… 00    1.58e 8 1.87e 7
 5 16      017797… 0400000… 16    ID     Idaho 00    2.14e11 2.39e 9
 6 17      017797… 0400000… 17    IL     Illi… 00    1.44e11 6.21e 9
 7 19      017797… 0400000… 19    IA     Iowa  00    1.45e11 1.08e 9
 8 21      017797… 0400000… 21    KY     Kent… 00    1.02e11 2.39e 9
 9 22      016295… 0400000… 22    LA     Loui… 00    1.12e11 2.37e10
10 24      017149… 0400000… 24    MD     Mary… 00    2.52e10 6.98e 9
# … with 42 more rows, and 1 more variable: geometry <MULTIPOLYGON [°]>

The sf class geometry

st_geometry(usa)
Geometry set for 52 features 
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
epsg (SRID):    4269
proj4string:    +proj=longlat +datum=NAD83 +no_defs
First 5 geometries:
MULTIPOLYGON (((-173.0746 60.70466, -172.9126 6...
MULTIPOLYGON (((-118.594 33.4672, -118.4848 33....
MULTIPOLYGON (((-109.06 38.49999, -109.06 38.49...
MULTIPOLYGON (((-77.11976 38.93434, -77.04102 3...
MULTIPOLYGON (((-117.243 44.39097, -117.2151 44...

Simple feature geometry (sfg)

sf objects can be operated on like tibbles

library(tidyverse)
usa_48 <-
    usa %>%
    filter(!(STUSPS %in% c('AK', 'HI', 'PR')))

The sf package has built in spatial-specific functions

state_areas <-
    usa %>%
    mutate(area = st_area(.)) %>%
    st_set_geometry(NULL) %>%
    select(NAME, area) %>%
    arrange(desc(area)) 
state_areas
# A tibble: 52 x 2
   NAME       area              
   <chr>      <S3: units>       
 1 Alaska     1.560670e+12 [m^2]
 2 Texas      6.926629e+11 [m^2]
 3 California 4.105170e+11 [m^2]
 4 Montana    3.806071e+11 [m^2]
 5 New Mexico 3.149147e+11 [m^2]
 6 Arizona    2.952381e+11 [m^2]
 7 Nevada     2.863439e+11 [m^2]
 8 Colorado   2.695815e+11 [m^2]
 9 Wyoming    2.533501e+11 [m^2]
10 Oregon     2.513754e+11 [m^2]
# … with 42 more rows

sf objects can be plotted with ggplot2

usa_48 %>%
    ggplot() +
    geom_sf()

plot of chunk unnamed-chunk-6

Merging sf objects with tibbles

  • We will use data from the socviz package which has opioid death rates by states over time
library(socviz)
data(opiates)
head(opiates[,c("adjusted", "abbr")])
# A tibble: 6 x 2
  adjusted abbr 
     <dbl> <chr>
1      0.8 AL   
2      4   AK   
3      4.7 AZ   
4      1.1 AR   
5      4.5 CA   
6      3.7 CO   

Merging sf objects with tibbles

  • The opiates object has the state names in the abbr column, while our usa_48 object has the state names in the STUSPS column
opiates <-
    opiates %>%
    select(-state) %>%
    rename(STUSPS = abbr) %>%
    complete(STUSPS, year)

Merging sf objects with tibbles

  • We can now merge our sf object with this tibble, just as if itself were a tibble
usa_48 <- left_join(usa_48, opiates, by = "STUSPS")
head(usa_48)
Simple feature collection with 6 features and 18 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -124.4096 ymin: 32.53416 xmax: -114.1391 ymax: 42.00925
epsg (SRID):    4269
proj4string:    +proj=longlat +datum=NAD83 +no_defs
# A tibble: 6 x 19
  STATEFP STATENS AFFGEOID GEOID STUSPS NAME  LSAD    ALAND  AWATER  year
  <chr>   <chr>   <chr>    <chr> <chr>  <chr> <chr>   <dbl>   <dbl> <int>
1 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  1999
2 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  2000
3 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  2001
4 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  2002
5 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  2003
6 06      017797… 0400000… 06    CA     Cali… 00    4.03e11 2.05e10  2004
# … with 9 more variables: fips <int>, deaths <int>, population <int>,
#   crude <dbl>, adjusted <dbl>, adjusted_se <dbl>, region <ord>,
#   division_name <chr>, geometry <MULTIPOLYGON [°]>

We can now plot the opioid death rates over time

usa_48 %>%
    filter(year > 1999) %>%
    ggplot(aes(fill = adjusted)) +
    geom_sf() +
    facet_wrap(~year, ncol = 4) +
    scale_fill_viridis_c(option = "plasma") +
    theme(legend.position = "bottom",
          strip.background = element_blank()) +
    labs(fill = "Death rate per 100,000 population ",
         title = "Opiate Related Deaths by State, 2000-2014")  

We can now plot the opioid death rates over time

plot of chunk unnamed-chunk-11

Adding animination with the gganiminate package

library(gganimate)
usa_48 %>%
    filter(year > 1999) %>%
    ggplot(aes(fill = adjusted)) +
    geom_sf()
    scale_fill_viridis_c(option = "plasma") +
    labs(fill = "Death rate per 100,000 population ",
         title = "Opiate Related Deaths by State, {frame_time}") +
    transition_time(as.integer(year)) +
    ease_aes('bounce-in-out')

Adding animination with the gganiminate package

Another example

Downloading crimes data

  • Data available from Open Baltimore
  • Has data on crimes in Baltimore from Jan. 2015-Feb. 2019
  • Constantly updated
library(sf)
library(tidyverse)
library(lubridate)
crimes <- read.csv("data/BPD_Part_1_Victim_Based_Crime_Data.csv")
shootings <-
    crimes %>%
    mutate(CrimeDate = as.character(CrimeDate),
           CrimeDate = mdy(CrimeDate),
           month = month(CrimeDate),
           year = year(CrimeDate)) %>%
    filter(!is.na(Latitude), !is.na(Longitude), Description == "SHOOTING") %>%
    select(Latitude, Longitude, month, year)

Can use the sf package to transform spatial data frames to sf objects

shootings_sf <- st_as_sf(shootings, coords = c("Longitude", "Latitude"))
st_geometry(shootings_sf)
Geometry set for 2764 features 
geometry type:  POINT
dimension:      XY
bbox:           xmin: -76.70956 ymin: 39.22226 xmax: -76.53015 ymax: 39.37195
epsg (SRID):    NA
proj4string:    NA
First 5 geometries:
POINT (-76.55572 39.32584)
POINT (-76.59638 39.31567)
POINT (-76.65384 39.30471)
POINT (-76.63653 39.30819)
POINT (-76.63653 39.30819)

Downloading Baltimore neighborhood shape file

  • Data also available from Baltimore Open Data
neighborhoods <-
    read_sf("data/baltimore_neighborhoods/Neighborhoods.shp") %>%
    select(Name)
st_geometry(neighborhoods)
Geometry set for 278 features 
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 1393927 ymin: 557733.6 xmax: 1445503 ymax: 621406.8
epsg (SRID):    NA
proj4string:    +proj=lcc +lat_1=38.3 +lat_2=39.45 +lat_0=37.66666666666666 +lon_0=-77 +x_0=399999.9999999999 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
First 5 geometries:
MULTIPOLYGON (((1422345 603620.8, 1422192 60361...
MULTIPOLYGON (((1404990 592042, 1404990 592036....
MULTIPOLYGON (((1434377 608229.7, 1434487 60820...
MULTIPOLYGON (((1401059 612450.6, 1401005 61251...
MULTIPOLYGON (((1437179 597502.8, 1437145 59753...

Have to fix the projection to latitude/longitutde

neighborhoods <- st_transform(neighborhoods, "+proj=longlat")
st_geometry(neighborhoods)
Geometry set for 278 features 
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -76.71141 ymin: 39.19723 xmax: -76.52967 ymax: 39.372
epsg (SRID):    4326
proj4string:    +proj=longlat +ellps=WGS84 +no_defs
First 5 geometries:
MULTIPOLYGON (((-76.61113 39.32344, -76.61167 3...
MULTIPOLYGON (((-76.67263 39.29184, -76.67262 3...
MULTIPOLYGON (((-76.56852 39.33594, -76.56814 3...
MULTIPOLYGON (((-76.68626 39.3479, -76.68646 39...
MULTIPOLYGON (((-76.5588 39.30646, -76.55892 39...

Useful operation from the sf package

st_intersection in practice

st_crs(shootings_sf) <- st_crs(neighborhoods)
shootings_in_bmore_neighborhoods <-
    shootings_sf %>%
    st_crop(st_bbox(neighborhoods)) %>%
    st_intersection(neighborhoods)
shootings_in_bmore_neighborhoods
Simple feature collection with 2764 features and 3 fields
geometry type:  POINT
dimension:      XY
bbox:           xmin: -76.70956 ymin: 39.22226 xmax: -76.53015 ymax: 39.37195
epsg (SRID):    4326
proj4string:    +proj=longlat +ellps=WGS84 +no_defs
First 10 features:
     month year      Name                   geometry
172     11 2018 Allendale POINT (-76.67467 39.28939)
566      5 2018 Allendale POINT (-76.67961 39.29269)
684      3 2018 Allendale POINT (-76.67817 39.28942)
685      3 2018 Allendale POINT (-76.67817 39.28942)
686      3 2018 Allendale POINT (-76.67817 39.28942)
1073     8 2017 Allendale POINT (-76.67352 39.29022)
1136     7 2017 Allendale POINT (-76.68107 39.29169)
1137     7 2017 Allendale POINT (-76.68107 39.29169)
1319     3 2017 Allendale POINT (-76.67544 39.29144)
1320     3 2017 Allendale POINT (-76.67544 39.29144)

Plotting the exact locations of shootings

ggplot() +
    geom_sf(data = neighborhoods) +
    geom_sf(data = shootings_in_bmore_neighborhoods, alpha = .4)

Plotting the exact locations of shootings

Aggregating the number of shootings by neighborhood using dplyr

st_geometry(shootings_in_bmore_neighborhoods) <- NULL
shootings_by_neighborhood <-
    shootings_in_bmore_neighborhoods %>%
    group_by(Name) %>%
    summarize(nshootings = n()) 

Aggregating the number of shootings by neighborhood using dplyr

neighborhoods <-
    left_join(neighborhoods, shootings_by_neighborhood, by = "Name")
neighborhoods
Simple feature collection with 278 features and 2 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -76.71141 ymin: 39.19723 xmax: -76.52967 ymax: 39.372
epsg (SRID):    4326
proj4string:    +proj=longlat +ellps=WGS84 +no_defs
# A tibble: 278 x 3
   Name            nshootings                                      geometry
   <chr>                <int>                            <MULTIPOLYGON [°]>
 1 Abell                   NA (((-76.61113 39.32344, -76.61167 39.32343, -…
 2 Allendale               21 (((-76.67263 39.29184, -76.67262 39.29182, -…
 3 Arcadia                  4 (((-76.56852 39.33594, -76.56814 39.33588, -…
 4 Arlington               21 (((-76.68626 39.3479, -76.68646 39.3481, -76…
 5 Armistead Gard…          2 (((-76.5588 39.30646, -76.55892 39.30654, -7…
 6 Ashburton                5 (((-76.67465 39.32513, -76.67556 39.3254, -7…
 7 Baltimore High…         15 (((-76.56954 39.29424, -76.56954 39.29431, -…
 8 Patterson Park…          3 (((-76.57253 39.29477, -76.57205 39.2948, -7…
 9 Barclay                 23 (((-76.60948 39.31215, -76.60948 39.31206, -…
10 Barre Circle             2 (((-76.62756 39.28383, -76.62783 39.28382, -…
# … with 268 more rows

And the final plot!

bmore_shootings <-
    neighborhoods %>%
    mutate(nshootings = ifelse(is.na(nshootings), 0, nshootings)) %>%
    ggplot(aes(fill = nshootings)) +
    geom_sf()  +
    scale_fill_viridis_c(option = "plasma") +
    guides(fill=guide_legend(title=NULL)) +
    ggtitle("Number of shootings by neighborhood \nJan. 2015 - Feb. 2019")

And the final plot!